# -*- coding: utf-8 -*-
# time: 2025/5/10 09:18
# file: tf01.py
# author: hanson
"""
小模型推理
Pythia-70m需要约1GB GPU内存

"""
import torch
from datasets import load_dataset

from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments

from base.tansformers_pip import generator
from turn_found.small_tf.tf02_dataSet_modify import get_modify_dataSet

tokenizer = AutoTokenizer.from_pretrained("./my_finetuned")
model = AutoModelForCausalLM.from_pretrained("./my_finetuned")
from transformers import pipeline
# 如果你想要更简单的生成方式，也可以使用pipeline：
# 将模型移动到GPU（如果有的话）
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)
# 准备输入文本
prompt = "question: 请用中文介绍自己?\nanswer:"
# 加载微调模型
generator = pipeline("text-generation", model="./my_finetuned")

# 单条文本生成测试
test_input = "question:Can Lamini play games or tell jokes? Can it be my gaming buddy?\nanswer:"
output = generator(test_input, max_length=200, do_sample=True)
print(output)